
global path = "C:\Users\creto\Desktop\chinese_students_2017\" 
*global path = "D:\Dropbox\chinese_students_2017"

/*
TO DO:
-pretrend trade graph
-postperiod trade graph (first stage)
-trend of chinese students in US overall in pre period
-table form

*/

//OPEN LOG FILE
cap log close
log using "$path/Logfiles/rotemberg_weights_01Aug2019.log", replace


clear
set more off
set matsize 10000

adopath+"$path/ADO"

/****************************************************************************/

****************       STEP 1: LOAD DATA     **************

/****************************************************************************/

	
	//LOAD TRADE DATA BY CITY - NOTE NEW DATA WITH CORRECTED PREFECTURE AGG FOR TRADE, STUD, AND NTR IV.
	use "$path/Data/6.AnalysisData/data_merge10jul2019.dta", clear
		tab year balanced, m //431 cities in trade data
		drop if year==.
	
		
		*population is in units of 10,000 persons, conver to 1,000 persons
		sum pop
		replace pop = pop*10
		sum pop
*----------------------------------------------------------------------------*			
/****************************************************************************/

**************       STEP 2: VARIABLE CONSTRUCTION       *********************

******************************************************************************
*----------------------------------------------------------------------------*			
	


	**CALCULATE LOG OF VARIABLES
		foreach x in emp dvalue dvalue_processing gdp wid inv_realest inv_fdi num_college num_midschool libbook rideship numstd_college numstd_midschool ///
					numfcty_college numfcty_midschool num1body_c num1body_cp num1title_c num1title_cp num2title_c num2title_cp num2body_c num2body_cp ///
					tot_amtfunds_personal tot_amtfunds_other tot_amtfunds_total tot_amtcost_tuition tot_amtcost_living tot_amtcost_other tot_amtcost_total {

			gen ln_`x' = ln(`x')
		
		}
	
	**CALCULATE DEPENDENT VARIABLE FOR FIRST STAGE - CHANGE IN ACTUAL EXPORTS
	
		sort citycode year
		foreach x in dvalue dvalue_processing gdp wid {  // Any other variables??
		
			bys citycode: gen pct_`x'_0413 = (`x' - `x'[_n-9])/`x'[_n-9] if year==2013	
			bys citycode: gen pct_`x'_0113 = (`x' - `x'[_n-12])/`x'[_n-12] if year==2013	
			bys citycode: gen pct_`x'_0013 = (`x' - `x'[_n-13])/`x'[_n-12] if year==2013	
			bys citycode: gen pct_`x' = (`x' - `x'[_n-1])/`x'[_n-1] if year>2000

			
			bys citycode: gen lnc_`x'_0413 = (ln_`x' - ln_`x'[_n-9]) if year==2013
			bys citycode: gen lnc_`x'_0113 = (ln_`x' - ln_`x'[_n-12]) if year==2013
			bys citycode: gen lnc_`x'_0013 = (ln_`x' - ln_`x'[_n-13]) if year==2013
			bys citycode: gen lnc_`x' = (ln_`x' - ln_`x'[_n-1]) if year>2000
			
			bys citycode: gen l1c_`x'_0013 = (ln(`x'+1) - ln(`x'[_n-13]+1)) if year==2013 //log + 1
	
		}
	
	
		

	**CONSTRUCT OUTCOME STUDENT VARIABLES: NOTE STUDENTS CAN ONLY BE MEASURED 04-13

		rename stud_3 stud_as
		rename stud_4 stud_ba
		rename stud_5 stud_ma
		rename stud_6 stud_dr
	
		*how many cities with population in 2004? Answer is 280
		*codebook citycode if year==2004 & pop!=.
		
		sort citycode year
		foreach lvl in as ba ma dr tot {
		
			**log change. Need to add 1
			bys citycode: gen lnc_stud_`lvl'_0413 = ln(stud_`lvl') - ln(stud_`lvl'[_n-9]) if year==2013
			
			**pct change as share of pop
			bys citycode: gen shc_stud_`lvl'_0413 = (stud_`lvl' - stud_`lvl'[_n-9]) / (pop[_n-9]) if year==2013
			
			**inverse hyperbolic sine change. 
			bys citycode: gen isc_stud_`lvl'_0413 = ln(stud_`lvl'+((stud_`lvl'^2+1)^0.5)) - ln(stud_`lvl'[_n-9]+(((stud_`lvl'[_n-9])^2+1)^0.5)) if year==2013
		
		}

		

	**EMPLOYMENT WEIGHTS, USE 2003 POP OR EMP
		sort citycode year
		bys citycode: gen empwt03 = emp[_n-10] if year==2013 //2013 is the only year used for regressions, need employment in those dates (use 2003)
		bys citycode: gen popwt03 = pop[_n-10] if year==2013 //2013 is the only year used for regressions, need employment in those dates (use 2003)
		
		

	
	**CLEAN UP, RENAME, LABEL
		rename ntrgap_exw97 iv_ntr
		rename mfa2001_exw97 iv_mfa
		rename lnc_wid_0413 iv_wid_0413
		rename lnc_wid_0113 iv_wid_0113
		rename lnc_wid_0013 iv_wid_0013
		
		label var iv_mfa "MFA Instrument"
		label var iv_ntr "\$PNTR_{c}\$"
		
				
				

	
	
	
		
//BARTIK CHECKS FOR NTR GAP IV USING 1997 EXPORT SHARES
	
	
	*keep only vars and analysis sample needed
	keep year balanced citycode chinacity popwt03 contract_cons_exw97 fraction_nfirm_exw97 share_revenue_exw97 contract_liberal_exw97 lnc_dvalue_0413 shc_stud_tot_0413 iv*
	keep if year==2013 & balanced==1
	qui tab chinacity
	di r(r)
	
	*merge in WIDE city employment shares and NTR gaps by industry 
	merge 1:1 chinacity using "$path/Data/6.AnalysisData/wide/expshare97_NTR_wide_pref_rev"
	keep if _merge==3
	
	
	
	local ind_stub beta_pi
	local growth_stub ntrgap
	local y shc_stud_tot0413
	local x lnc_dvalue_0413
	local weight ""
	local controls ""
	

	local time_var year
	local cluster_var citycode

	
*ROTEMB WEIGHTS FOR NTR
preserve	
	bartik_weight , z(beta_pi*) weightstub(ntrgap*) x(lnc_dvalue_0413) y(shc_stud_tot_0413) 


	mat beta = r(beta)
	mat alpha = r(alpha)
	mat G = r(G)
	qui desc beta_pi*, varlist
	local varlist = r(varlist)

	clear
	svmat beta
	svmat alpha
	svmat G

	qui gen ind = ""
	*qui gen year = ""
	local t = 1
	foreach var in `varlist' {
		if regexm("`var'", "`ind_stub'(.*)") {
			*qui replace year = regexs(1) if _n == `t'
			qui replace ind = regexs(1) if _n == `t'
			}
		local t = `t' + 1
		}


	
	
	
	
	**INSPECT HIGHEST ROTEMBERG WEIGHT INDUTSRIES
	gsort -alpha1
	
	li ind alpha1 in 1/10
	
	outsheet using "$path/Tables/rotembergweights.csv", comma replace
restore	
	